# --- SCRIPT: INDIA MODEL (v22 - Granger Fix) ---
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import adfuller
import os
import warnings

# Suppress warnings
warnings.filterwarnings('ignore')

# Set style for plots
plt.style.use('default')  # Use default style instead of seaborn
if sns.__version__:
    sns.set_theme(style="whitegrid")  # Set seaborn style if available

print("--- STARTING SCRIPT: INDIA MODEL (v22 - Granger Fix) ---")

# Load all available CSV files
csv_files = {
    'basketball': pd.read_csv('Basketball_India.csv'),
    'cricket': pd.read_csv('Cricket_India.csv'),
    'f1': pd.read_csv('F1_India.csv'),
    'football': pd.read_csv('Football_India.csv'),
    'golf': pd.read_csv('Golf_India.csv'),
    'tennis': pd.read_csv('Tennis_India.csv')
}

print("\nAvailable datasets:")
for name, df in csv_files.items():
    print(f"{name.capitalize()}: {len(df)} rows")

# Prepare the combined dataset
def prepare_dataset():
    # Initialize a list to store processed dataframes
    processed_dfs = []
    
    # Process each sport's data
    for sport_name, df in csv_files.items():
        try:
            # Skip the first row which contains "Category: All categories"
            df = pd.read_csv(f"{sport_name.capitalize()}_India.csv", skiprows=2)
            
            # Rename columns
            df.columns = ['Month', f'{sport_name.capitalize()}_Trend']
            
            # Convert to datetime
            df['Month'] = pd.to_datetime(df['Month'])
            df = df.set_index('Month')
            
            processed_dfs.append(df)
            print(f"Processed {sport_name} data: {len(df)} rows")
        except Exception as e:
            print(f"Error processing {sport_name}: {e}")
    
    if processed_dfs:
        # Combine all trends
        combined_df = pd.concat(processed_dfs, axis=1)
        
        # Calculate the mean trend across all sports
        combined_df['MX_LX_Ratio'] = combined_df.mean(axis=1)
        
        # Resample to quarterly frequency
        model_df = combined_df.resample('QE').mean()
        
        # Add simulated GDP growth rate (placeholder)
        # Using a more realistic GDP simulation
        np.random.seed(42)  # For reproducibility
        baseline = 2.0  # Average growth rate
        volatility = 0.5  # Standard deviation
        trend = np.linspace(-0.5, 0.5, len(model_df))  # Slight trend
        noise = np.random.normal(0, volatility, len(model_df))
        model_df['GDP_GROWTH'] = baseline + trend + noise
        
        return model_df
    else:
        raise ValueError("No valid data found in CSV files")

# Create the model dataset
try:
    model_df = prepare_dataset()
    print("\nCombined dataset created successfully:")
    print(f"Time period: {model_df.index.min()} to {model_df.index.max()}")
    print(f"Number of quarters: {len(model_df)}")
except Exception as e:
    print(f"Error creating dataset: {e}")
    exit()

# --- 2. Running India Granger Causality Test (Fixed) ---
print("\n--- Running India Granger Causality Test (Fixed) ---")

try:
    # We must use stationary data
    adf_test = adfuller(model_df['MX_LX_Ratio'].dropna())
    print(f"ADF test p-value: {adf_test[1]:.4f}.")
    if adf_test[1] > 0.05:
        print("Ratio is non-stationary. Using first difference.")
        model_df['Ratio_stationary'] = model_df['MX_LX_Ratio'].diff()
    else:
        print("Ratio is stationary. Using as-is.")
        model_df['Ratio_stationary'] = model_df['MX_LX_Ratio']
        
    # --- !! THE FIX !! ---
    # We create a VAR model with *only* GSSI and GDP Growth.
    # We remove the sparse 'VIX' data which caused the crash.
    var_data = model_df[['GDP_GROWTH', 'Ratio_stationary']].dropna()
    # --- !! END OF FIX !! ---
    
    if var_data.empty or var_data['GDP_GROWTH'].var() == 0 or var_data['Ratio_stationary'].var() == 0:
        print("--- Granger Failed: Data is empty or has zero variance after dropping NaNs ---")
    else:
        var_model = VAR(var_data)
        var_results = var_model.fit(maxlags=4, ic='aic')
        
        # H0: Ratio does NOT Granger-cause GDP_GROWTH
        test_result = var_results.test_causality('GDP_GROWTH', 'Ratio_stationary', kind='f')
        print(f"\nGranger Test (H0: GSSI-India does not cause GDP Growth):")
        print(f"P-value: {test_result.pvalue:.4f}")

except Exception as e:
    print(f"--- Granger Causality Failed: {e} ---")
    
print("\n--- INDIA ANALYSIS COMPLETE ---")